{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0c1ed389-03b2-4725-a908-03180a76c578",
   "metadata": {},
   "outputs": [],
   "source": [
    "suppressMessages(library(ArchR))\n",
    "suppressMessages(library(Seurat))\n",
    "suppressMessages(library(Signac))\n",
    "suppressMessages(library(dplyr))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e38bb6e5-b45a-4e0e-9404-8544069493c0",
   "metadata": {},
   "outputs": [],
   "source": [
    "obj.atac <- readRDS(\"../../DataIntegration/data/VisiumHeart/snATAC.annotated.Rds\")\n",
    "obj.rna <- readRDS(\"../../../snRNA/from_rico/integrated_snrnaseq/integrated_rnasamples_ann.rds\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2e3e8764-ebb2-48e7-af5a-aa2bace788d1",
   "metadata": {},
   "outputs": [],
   "source": [
    "obj.atac"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3d984058-9c6e-4696-b9e6-1bd98a923137",
   "metadata": {},
   "outputs": [],
   "source": [
    "obj.rna"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9e3f6ef6-83c9-463e-9b94-da0a2ebd3b26",
   "metadata": {},
   "outputs": [],
   "source": [
    "## update the same cell name\n",
    "obj.rna$cell_type <- stringr::str_replace_all(obj.rna$cell_type,\n",
    "                                              c(\"PC\" = \"Pericyte\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "738b2b2e-a800-4728-b486-2385fc097daa",
   "metadata": {},
   "outputs": [],
   "source": [
    "obj.rna$Sample <- obj.rna$orig.ident"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ccfba885-d247-45d2-a09b-42e757108bc6",
   "metadata": {},
   "outputs": [],
   "source": [
    "length(unique(obj.rna$Sample))\n",
    "length(unique(obj.atac$Sample))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0851bd00-cdcc-4e94-8b08-c154487bc539",
   "metadata": {},
   "outputs": [],
   "source": [
    "table(obj.atac$cell_type)\n",
    "table(obj.rna$cell_type)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2340e845-d1fd-4028-a614-127b22264338",
   "metadata": {},
   "outputs": [],
   "source": [
    "cols <- ArchR::paletteDiscrete(obj.rna$cell_type)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a7a92b29-6630-4b64-8209-308cb034eb3c",
   "metadata": {},
   "outputs": [],
   "source": [
    "p1 <- DimPlot(obj.rna, reduction = \"umap_harmony\", group.by = \"cell_type\",\n",
    "             raster = TRUE, size = 0.1) +\n",
    "    scale_color_manual(values = cols) +\n",
    "    ggtitle(\"snRNA-seq\")\n",
    "    \n",
    "p2 <- DimPlot(obj.atac, reduction = \"umap_harmony_v2\", group.by = \"cell_type\",\n",
    "             size = 0.1, raster = TRUE) +\n",
    "    scale_color_manual(values = cols) +\n",
    "    ggtitle(\"snATAC-seq\")\n",
    "\n",
    "options(repr.plot.height = 5, repr.plot.width = 12)\n",
    "\n",
    "p1 + p2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "345a79fa-7fd5-42a7-acfd-58f7b249f73d",
   "metadata": {},
   "outputs": [],
   "source": [
    "## remove prolif. cells from snRNA\n",
    "Idents(obj.rna) <- \"cell_type\"\n",
    "obj.rna <- subset(obj.rna, idents = \"prolif\", invert = TRUE)\n",
    "obj.rna"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "46fa0084-2eee-4626-8bb7-524e1d8e37c9",
   "metadata": {},
   "outputs": [],
   "source": [
    "# add meta data for snRNA-seq\n",
    "df <- read.csv(\"../../../Annotations_figures/snrna_patient_anns_revisions.csv\")\n",
    "head(df)\n",
    "\n",
    "## add sample annotation\n",
    "condition <- df$condition\n",
    "names(condition) <- df$sample_id\n",
    "obj.rna$condition <- stringr::str_replace_all(obj.rna$Sample,\n",
    "                                                condition)\n",
    "\n",
    "## add region\n",
    "region <- df$region\n",
    "names(region) <- df$sample_id\n",
    "obj.rna$region <- stringr::str_replace_all(obj.rna$Sample,\n",
    "                                                region)\n",
    "\n",
    "## add patient_group\n",
    "patient_group <- df$patient_group\n",
    "names(patient_group) <- df$sample_id\n",
    "obj.rna$patient_group <- stringr::str_replace_all(obj.rna$Sample,\n",
    "                                                patient_group)\n",
    "\n",
    "## add global_id\n",
    "global_id <- df$global_ID\n",
    "names(global_id) <- df$sample_id\n",
    "obj.rna$global_id <- stringr::str_replace_all(obj.rna$Sample,\n",
    "                                                global_id)\n",
    "\n",
    "## add rep\n",
    "rep <- as.character(df$rep)\n",
    "names(rep) <- df$sample_id\n",
    "obj.rna$rep <- stringr::str_replace_all(obj.rna$Sample,\n",
    "                                                rep)\n",
    "\n",
    "## add patient\n",
    "patient <- df$patient\n",
    "names(patient) <- df$sample_id\n",
    "obj.rna$patient <- stringr::str_replace_all(obj.rna$Sample,\n",
    "                                                patient)\n",
    "\n",
    "## add patient\n",
    "region_novel <- df$region_novel\n",
    "names(region_novel) <- df$sample_id\n",
    "obj.rna$region_novel <- stringr::str_replace_all(obj.rna$Sample,\n",
    "                                                region_novel)\n",
    "\n",
    "## add patient\n",
    "patient_id <- df$patient_id\n",
    "names(patient_id) <- df$sample_id\n",
    "obj.rna$patient_id <- stringr::str_replace_all(obj.rna$Sample,\n",
    "                                                patient_id)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cbe31af4-4473-4a62-b573-757b9f3f93bc",
   "metadata": {},
   "outputs": [],
   "source": [
    "## add gene activity matrix\n",
    "gene.activities <- readRDS(\"../../DataIntegration/data/VisiumHeart/GeneScoreMatrix.Rds\")\n",
    "\n",
    "dim(gene.activities)\n",
    "\n",
    "gene.use <- intersect(rownames(gene.activities),\n",
    "                     rownames(obj.rna))\n",
    "\n",
    "gene.activities <- gene.activities[gene.use, colnames(obj.atac)]\n",
    "\n",
    "obj.atac[['GeneActivity']] <- CreateAssayObject(counts = gene.activities)\n",
    "\n",
    "DefaultAssay(obj.atac) <- \"GeneActivity\"\n",
    "\n",
    "obj.atac <- obj.atac %>% \n",
    "        NormalizeData() %>%\n",
    "        FindVariableFeatures() %>%\n",
    "        ScaleData() %>%\n",
    "        RunPCA(verbose = FALSE)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "54adc700-1d61-48a5-8a77-1e702a36583a",
   "metadata": {},
   "outputs": [],
   "source": [
    "obj.rna <- obj.rna %>% \n",
    "        FindVariableFeatures()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "281439fa-9c16-49bd-be00-164f6371eedc",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Identify anchors\n",
    "transfer.anchors <- FindTransferAnchors(reference = obj.rna, \n",
    "                                        query = obj.atac, \n",
    "                                        features = VariableFeatures(object = obj.rna),\n",
    "                                        reference.assay = \"RNA\", \n",
    "                                        query.assay = \"GeneActivity\", \n",
    "                                        reduction = \"cca\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "749c118e-46ae-432a-b262-a3c600dba93f",
   "metadata": {},
   "outputs": [],
   "source": [
    "celltype.predictions <- TransferData(anchorset = transfer.anchors, \n",
    "                                     refdata = obj.rna$cell_type,\n",
    "                                     weight.reduction = obj.atac[[\"harmony\"]], \n",
    "                                     dims = 1:30)\n",
    "\n",
    "obj.atac <- AddMetaData(obj.atac, metadata = celltype.predictions)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c600c4c5-9cc4-46b1-8b5b-9b98f093667a",
   "metadata": {},
   "outputs": [],
   "source": [
    "p1 <- DimPlot(obj.atac, reduction = \"umap_harmony_v2\", group.by = \"cell_type\",\n",
    "             raster = TRUE, size = 0.1) +\n",
    "    scale_color_manual(values = cols) +\n",
    "    ggtitle(\"snATAC-seq cell type\")\n",
    "    \n",
    "p2 <- DimPlot(obj.atac, reduction = \"umap_harmony_v2\", group.by = \"predicted.id\",\n",
    "             size = 0.1, raster = TRUE) +\n",
    "    scale_color_manual(values = cols) +\n",
    "    ggtitle(\"snATAC-seq predicted label\")\n",
    "\n",
    "options(repr.plot.height = 5, repr.plot.width = 12)\n",
    "\n",
    "p1 + p2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "eee7a1a0-bf75-4662-9a21-b95a50e5c3f5",
   "metadata": {},
   "outputs": [],
   "source": [
    "## merge ATAC-RNA\n",
    "# we here impute full transcriptome\n",
    "refdata <- GetAssayData(obj.rna, assay = \"RNA\", slot = \"data\")[gene.use, ]\n",
    "\n",
    "# refdata (input) contains a scRNA-seq expression matrix for the scRNA-seq cells.  imputation\n",
    "# (output) will contain an imputed scRNA-seq matrix for each of the ATAC cells\n",
    "obj.atac[[\"RNA\"]] <- TransferData(anchorset = transfer.anchors, \n",
    "                                  refdata = refdata, \n",
    "                                  weight.reduction = \"cca\")\n",
    "\n",
    "DefaultAssay(obj.atac) <- \"RNA\"\n",
    "\n",
    "obj.rna$tech <- \"RNA\"\n",
    "obj.atac$tech <- \"ATAC\"\n",
    "\n",
    "obj.rna.sub <- subset(obj.rna, features = gene.use)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e056e3bd-cb88-40cd-a671-bc5f512005c3",
   "metadata": {},
   "outputs": [],
   "source": [
    "coembed <- merge(x = obj.atac, y = obj.rna.sub)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9b2a10ec-9cfa-4a28-8542-e1706e35aa99",
   "metadata": {},
   "outputs": [],
   "source": [
    "#Finally, we run PCA and UMAP on this combined object, to visualize the co-embedding of both datasets\n",
    "coembed <- coembed %>%\n",
    "    ScaleData(features = gene.use, do.scale = FALSE) %>%\n",
    "    FindVariableFeatures() %>%\n",
    "    RunPCA() %>%\n",
    "    RunUMAP(dims = 1:30, verbose = FALSE)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3bf87ed3-2306-46f5-be09-9db3749acc48",
   "metadata": {},
   "outputs": [],
   "source": [
    "p1 <- DimPlot(coembed, group.by = \"tech\") +\n",
    "    xlab(\"UMAP1\") + ylab(\"UMAP2\")\n",
    "\n",
    "p2 <- DimPlot(coembed, group.by = \"cell_type\", label = TRUE) +\n",
    "    xlab(\"UMAP1\") + ylab(\"UMAP2\")\n",
    "\n",
    "p1 + p2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2522888b-0193-4f44-babb-13f25eed13b2",
   "metadata": {},
   "outputs": [],
   "source": [
    "saveRDS(obj.atac, file = \"../data/snATAC.Rds\")\n",
    "saveRDS(obj.rna, file = \"../data/snRNA.Rds\")\n",
    "saveRDS(coembed, file = \"../data/snRNA-snATAC-coembedding.Rds\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c63f93cf-e9aa-4c59-b24a-4cbedf1fb411",
   "metadata": {},
   "outputs": [],
   "source": [
    "sessionInfo()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "R",
   "language": "R",
   "name": "ir"
  },
  "language_info": {
   "codemirror_mode": "r",
   "file_extension": ".r",
   "mimetype": "text/x-r-source",
   "name": "R",
   "pygments_lexer": "r",
   "version": "4.0.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
